From 6ee6dbde9af835efb040a72146de3b60dbc87c08 Mon Sep 17 00:00:00 2001 From: "cl349@freefall.cl.cam.ac.uk" Date: Thu, 29 Jul 2004 13:04:03 +0000 Subject: [PATCH] bitkeeper revision 1.1108.38.1 (4108f5c3GAt0sOF9Rw25ssmCIA9QCg) support creating domains from Linux 2.6 --- .rootkeys | 6 + .../arch/xen/i386/mm/ioremap.c | 160 ++++ .../arch/xen/kernel/Makefile | 2 +- .../arch/xen/kernel/xen_proc.c | 18 + linux-2.6.7-xen-sparse/drivers/char/mem.c | 792 ++++++++++++++++++ linux-2.6.7-xen-sparse/drivers/xen/Makefile | 5 +- .../drivers/xen/privcmd/Makefile | 2 + .../drivers/xen/privcmd/privcmd.c | 230 +++++ .../include/asm-xen/asm-i386/pgalloc.h | 11 + .../include/asm-xen/proc_cmd.h | 63 ++ .../include/asm-xen/xen_proc.h | 13 + 11 files changed, 1299 insertions(+), 3 deletions(-) create mode 100644 linux-2.6.7-xen-sparse/arch/xen/kernel/xen_proc.c create mode 100644 linux-2.6.7-xen-sparse/drivers/char/mem.c create mode 100644 linux-2.6.7-xen-sparse/drivers/xen/privcmd/Makefile create mode 100644 linux-2.6.7-xen-sparse/drivers/xen/privcmd/privcmd.c create mode 100644 linux-2.6.7-xen-sparse/include/asm-xen/proc_cmd.h create mode 100644 linux-2.6.7-xen-sparse/include/asm-xen/xen_proc.h diff --git a/.rootkeys b/.rootkeys index 808ed7919c..739f8f7f5b 100644 --- a/.rootkeys +++ b/.rootkeys @@ -194,6 +194,8 @@ 40f56239pYRq5yshPTkv3ujXKc8K6g linux-2.6.7-xen-sparse/arch/xen/kernel/empty.c 40f56239sFcjHiIRmnObRIDF-zaeKQ linux-2.6.7-xen-sparse/arch/xen/kernel/process.c 40f562392LBhwmOxVPsYdkYXMxI_ZQ linux-2.6.7-xen-sparse/arch/xen/kernel/reboot.c +3f68905c5eiA-lBMQSvXLMWS1ikDEA linux-2.6.7-xen-sparse/arch/xen/kernel/xen_proc.c +4108f5c1WfTIrs0HZFeV39sttekCTw linux-2.6.7-xen-sparse/drivers/char/mem.c 40f56239Dp_vMTgz8TEbvo1hjHGc3w linux-2.6.7-xen-sparse/drivers/xen/Makefile 40f56239Sfle6wGv5FS0wjS_HI150A linux-2.6.7-xen-sparse/drivers/xen/block/Kconfig 40f562395atl9x4suKGhPkjqLOXESg linux-2.6.7-xen-sparse/drivers/xen/block/Makefile @@ -207,6 +209,8 @@ 40f56239lrg_Ob0BJ8WBFS1zeg2CYw linux-2.6.7-xen-sparse/drivers/xen/net/Kconfig 40f56239Wd4k_ycG_mFsSO1r5xKdtQ linux-2.6.7-xen-sparse/drivers/xen/net/Makefile 405853f6nbeazrNyEWNHBuoSg2PiPA linux-2.6.7-xen-sparse/drivers/xen/net/network.c +4108f5c1ppFXVpQzCOAZ6xXYubsjKA linux-2.6.7-xen-sparse/drivers/xen/privcmd/Makefile +3e5a4e65IUfzzMu2kZFlGEB8-rpTaA linux-2.6.7-xen-sparse/drivers/xen/privcmd/privcmd.c 40f56239YAjS52QG2FIAQpHDZAdGHg linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/desc.h 4107adf1E5O4ztGHNGMzCCNhcvqNow linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h 40f5623anSzpuEHgiNmQ56fIRfCoaQ linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/e820.h @@ -255,7 +259,9 @@ 40f5623cb_pJrLt3h_nAzvJsjsV0rQ linux-2.6.7-xen-sparse/include/asm-xen/hypervisor-ifs/trace.h 40f5623cndVUFlkxpf7Lfx7xu8madQ linux-2.6.7-xen-sparse/include/asm-xen/multicall.h 40f5623cTZ80EwjWUBlh44A9F9i_Lg linux-2.6.7-xen-sparse/include/asm-xen/netif.h +3f108af1ylCIm82H052FVTfXACBHrw linux-2.6.7-xen-sparse/include/asm-xen/proc_cmd.h 40f5623cBiQhPHILVLrl3xa6bDBaRg linux-2.6.7-xen-sparse/include/asm-xen/xen.h +3f689063BoW-HWV3auUJ-OqXfcGArw linux-2.6.7-xen-sparse/include/asm-xen/xen_proc.h 40f56a0ddHCSs3501MY4hRf22tctOw linux-2.6.7-xen-sparse/mkbuildtree 40e1b09db5mN69Ijj0X_Eol-S7dXiw tools/Make.defs 3f776bd1Hy9rn69ntXBhPReUFw9IEA tools/Makefile diff --git a/linux-2.6.7-xen-sparse/arch/xen/i386/mm/ioremap.c b/linux-2.6.7-xen-sparse/arch/xen/i386/mm/ioremap.c index ca843cb92c..7ac9d527da 100644 --- a/linux-2.6.7-xen-sparse/arch/xen/i386/mm/ioremap.c +++ b/linux-2.6.7-xen-sparse/arch/xen/i386/mm/ioremap.c @@ -117,6 +117,9 @@ void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flag struct vm_struct * area; unsigned long offset, last_addr; + if (!(start_info.flags & SIF_PRIVILEGED)) + return NULL; + /* Don't allow wraparound or zero size */ last_addr = phys_addr + size - 1; if (!size || last_addr < phys_addr) @@ -316,3 +319,160 @@ void __init bt_iounmap(void *addr, unsigned long size) --nrpages; } } + + +#if defined(CONFIG_XEN_PRIVILEGED_GUEST) + +/* These hacky macros avoid phys->machine translations. */ +#define __direct_pte(x) ((pte_t) { (x) } ) +#define __direct_mk_pte(page_nr,pgprot) \ + __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) +#define direct_mk_pte_phys(physpage, pgprot) \ + __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot) + +static inline void direct_remap_area_pte(pte_t *pte, + unsigned long address, + unsigned long size, + mmu_update_t **v) +{ + unsigned long end; + + address &= ~PMD_MASK; + end = address + size; + if (end > PMD_SIZE) + end = PMD_SIZE; + if (address >= end) + BUG(); + + do { + (*v)->ptr = virt_to_machine(pte); + (*v)++; + address += PAGE_SIZE; + pte++; + } while (address && (address < end)); +} + +static inline int direct_remap_area_pmd(struct mm_struct *mm, + pmd_t *pmd, + unsigned long address, + unsigned long size, + mmu_update_t **v) +{ + unsigned long end; + + address &= ~PGDIR_MASK; + end = address + size; + if (end > PGDIR_SIZE) + end = PGDIR_SIZE; + if (address >= end) + BUG(); + do { + pte_t *pte = pte_alloc_kernel(mm, pmd, address); + if (!pte) + return -ENOMEM; + direct_remap_area_pte(pte, address, end - address, v); + + address = (address + PMD_SIZE) & PMD_MASK; + pmd++; + } while (address && (address < end)); + return 0; +} + +int __direct_remap_area_pages(struct mm_struct *mm, + unsigned long address, + unsigned long size, + mmu_update_t *v) +{ + pgd_t * dir; + unsigned long end = address + size; + + dir = pgd_offset(mm, address); + flush_cache_all(); + if (address >= end) + BUG(); + spin_lock(&mm->page_table_lock); + do { + pmd_t *pmd = pmd_alloc(mm, dir, address); + if (!pmd) + return -ENOMEM; + direct_remap_area_pmd(mm, pmd, address, end - address, &v); + address = (address + PGDIR_SIZE) & PGDIR_MASK; + dir++; + + } while (address && (address < end)); + spin_unlock(&mm->page_table_lock); + flush_tlb_all(); + return 0; +} + + +int direct_remap_area_pages(struct mm_struct *mm, + unsigned long address, + unsigned long machine_addr, + unsigned long size, + pgprot_t prot, + domid_t domid) +{ + int i; + unsigned long start_address; +#define MAX_DIRECTMAP_MMU_QUEUE 130 + mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *w, *v; + + if ( domid != 0 ) + { + u[0].val = (unsigned long)(domid<<16) & ~0xFFFFUL; + u[0].ptr = (unsigned long)(domid<< 0) & ~0xFFFFUL; + u[0].ptr |= MMU_EXTENDED_COMMAND; + u[0].val |= MMUEXT_SET_SUBJECTDOM; + v = w = &u[1]; + } + else + { + v = w = &u[0]; + } + + start_address = address; + + for( i = 0; i < size; i += PAGE_SIZE ) + { + if ( (v - u) == MAX_DIRECTMAP_MMU_QUEUE ) + { + /* Fill in the PTE pointers. */ + __direct_remap_area_pages( mm, + start_address, + address-start_address, + w); + + if ( HYPERVISOR_mmu_update(u, v - u, NULL) < 0 ) + return -EFAULT; + v = w; + start_address = address; + } + + /* + * Fill in the machine address: PTE ptr is done later by + * __direct_remap_area_pages(). + */ + v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot); + + machine_addr += PAGE_SIZE; + address += PAGE_SIZE; + v++; + } + + if ( v != w ) + { + /* get the ptep's filled in */ + __direct_remap_area_pages(mm, + start_address, + address-start_address, + w); + if ( unlikely(HYPERVISOR_mmu_update(u, v - u, NULL) < 0) ) + return -EFAULT; + } + + return 0; +} + + +#endif /* CONFIG_XEN_PRIVILEGED_GUEST */ diff --git a/linux-2.6.7-xen-sparse/arch/xen/kernel/Makefile b/linux-2.6.7-xen-sparse/arch/xen/kernel/Makefile index 4f164b0bdc..576eb0fb98 100644 --- a/linux-2.6.7-xen-sparse/arch/xen/kernel/Makefile +++ b/linux-2.6.7-xen-sparse/arch/xen/kernel/Makefile @@ -9,4 +9,4 @@ $(obj)/vmlinux.lds.s: arch/$(ARCH)/$(XENARCH)/kernel/vmlinux.lds.s extra-y += vmlinux.lds.s -obj-y := ctrl_if.o process.o reboot.o empty.o +obj-y := ctrl_if.o process.o reboot.o xen_proc.o empty.o diff --git a/linux-2.6.7-xen-sparse/arch/xen/kernel/xen_proc.c b/linux-2.6.7-xen-sparse/arch/xen/kernel/xen_proc.c new file mode 100644 index 0000000000..9c06dcdd89 --- /dev/null +++ b/linux-2.6.7-xen-sparse/arch/xen/kernel/xen_proc.c @@ -0,0 +1,18 @@ + +#include +#include + +static struct proc_dir_entry *xen_base; + +struct proc_dir_entry *create_xen_proc_entry(const char *name, mode_t mode) +{ + if ( xen_base == NULL ) + if ( (xen_base = proc_mkdir("xen", &proc_root)) == NULL ) + panic("Couldn't create /proc/xen"); + return create_proc_entry(name, mode, xen_base); +} + +void remove_xen_proc_entry(const char *name) +{ + remove_proc_entry(name, xen_base); +} diff --git a/linux-2.6.7-xen-sparse/drivers/char/mem.c b/linux-2.6.7-xen-sparse/drivers/char/mem.c new file mode 100644 index 0000000000..5dba3aee46 --- /dev/null +++ b/linux-2.6.7-xen-sparse/drivers/char/mem.c @@ -0,0 +1,792 @@ +/* + * linux/drivers/char/mem.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Added devfs support. + * Jan-11-1998, C. Scott Ananian + * Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar + * + * MODIFIED FOR XEN by Keir Fraser, 10th July 2003. + * Linux running on Xen has strange semantics for /dev/mem and /dev/kmem!! + * 1. mmap will not work on /dev/kmem + * 2. mmap on /dev/mem interprets the 'file offset' as a machine address + * rather than a physical address. + * I don't believe anyone sane mmaps /dev/kmem, but /dev/mem is mmapped + * to get at memory-mapped I/O spaces (eg. the VESA X server does this). + * For this to work at all we need to expect machine addresses. + * Reading/writing of /dev/kmem expects kernel virtual addresses, as usual. + * Reading/writing of /dev/mem expects 'physical addresses' as usual -- this + * is because /dev/mem can only read/write existing kernel mappings, which + * will be normal RAM, and we should present pseudo-physical layout for all + * except I/O (which is the sticky case that mmap is hacked to deal with). + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#ifdef CONFIG_IA64 +# include +#endif + +#ifdef CONFIG_FB +extern void fbmem_init(void); +#endif +#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_CHAR) +extern void tapechar_init(void); +#endif + +/* + * Architectures vary in how they handle caching for addresses + * outside of main memory. + * + */ +static inline int uncached_access(struct file *file, unsigned long addr) +{ +#if defined(__i386__) + /* + * On the PPro and successors, the MTRRs are used to set + * memory types for physical addresses outside main memory, + * so blindly setting PCD or PWT on those pages is wrong. + * For Pentiums and earlier, the surround logic should disable + * caching for the high addresses through the KEN pin, but + * we maintain the tradition of paranoia in this code. + */ + if (file->f_flags & O_SYNC) + return 1; + return !( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) || + test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) || + test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) || + test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability) ) + && addr >= __pa(high_memory); +#elif defined(__x86_64__) + /* + * This is broken because it can generate memory type aliases, + * which can cause cache corruptions + * But it is only available for root and we have to be bug-to-bug + * compatible with i386. + */ + if (file->f_flags & O_SYNC) + return 1; + /* same behaviour as i386. PAT always set to cached and MTRRs control the + caching behaviour. + Hopefully a full PAT implementation will fix that soon. */ + return 0; +#elif defined(CONFIG_IA64) + /* + * On ia64, we ignore O_SYNC because we cannot tolerate memory attribute aliases. + */ + return !(efi_mem_attributes(addr) & EFI_MEMORY_WB); +#elif defined(CONFIG_PPC64) + /* On PPC64, we always do non-cacheable access to the IO hole and + * cacheable elsewhere. Cache paradox can checkstop the CPU and + * the high_memory heuristic below is wrong on machines with memory + * above the IO hole... Ah, and of course, XFree86 doesn't pass + * O_SYNC when mapping us to tap IO space. Surprised ? + */ + return !page_is_ram(addr); +#else + /* + * Accessing memory above the top the kernel knows about or through a file pointer + * that was marked O_SYNC will be done non-cached. + */ + if (file->f_flags & O_SYNC) + return 1; + return addr >= __pa(high_memory); +#endif +} + +#ifndef ARCH_HAS_VALID_PHYS_ADDR_RANGE +static inline int valid_phys_addr_range(unsigned long addr, size_t *count) +{ + unsigned long end_mem; + + end_mem = __pa(high_memory); + if (addr >= end_mem) + return 0; + + if (*count > end_mem - addr) + *count = end_mem - addr; + + return 1; +} +#endif + +static ssize_t do_write_mem(void *p, unsigned long realp, + const char __user * buf, size_t count, loff_t *ppos) +{ + ssize_t written; + unsigned long copied; + + written = 0; +#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU)) + /* we don't have page 0 mapped on sparc and m68k.. */ + if (realp < PAGE_SIZE) { + unsigned long sz = PAGE_SIZE-realp; + if (sz > count) sz = count; + /* Hmm. Do something? */ + buf+=sz; + p+=sz; + count-=sz; + written+=sz; + } +#endif + copied = copy_from_user(p, buf, count); + if (copied) { + ssize_t ret = written + (count - copied); + + if (ret) + return ret; + return -EFAULT; + } + written += count; + *ppos += written; + return written; +} + + +/* + * This funcion reads the *physical* memory. The f_pos points directly to the + * memory location. + */ +static ssize_t read_mem(struct file * file, char __user * buf, + size_t count, loff_t *ppos) +{ + unsigned long p = *ppos; + ssize_t read; + + if (!valid_phys_addr_range(p, &count)) + return -EFAULT; + read = 0; +#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU)) + /* we don't have page 0 mapped on sparc and m68k.. */ + if (p < PAGE_SIZE) { + unsigned long sz = PAGE_SIZE-p; + if (sz > count) + sz = count; + if (sz > 0) { + if (clear_user(buf, sz)) + return -EFAULT; + buf += sz; + p += sz; + count -= sz; + read += sz; + } + } +#endif + if (copy_to_user(buf, __va(p), count)) + return -EFAULT; + read += count; + *ppos += read; + return read; +} + +static ssize_t write_mem(struct file * file, const char __user * buf, + size_t count, loff_t *ppos) +{ + unsigned long p = *ppos; + + if (!valid_phys_addr_range(p, &count)) + return -EFAULT; + return do_write_mem(__va(p), p, buf, count, ppos); +} + +#if !defined(CONFIG_XEN) +static int mmap_mem(struct file * file, struct vm_area_struct * vma) +{ + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + int uncached; + + uncached = uncached_access(file, offset); +#ifdef pgprot_noncached + if (uncached) + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); +#endif + + /* Don't try to swap out physical pages.. */ + vma->vm_flags |= VM_RESERVED; + + /* + * Don't dump addresses that are not real memory to a core file. + */ + if (uncached) + vma->vm_flags |= VM_IO; + + if (remap_page_range(vma, vma->vm_start, offset, vma->vm_end-vma->vm_start, + vma->vm_page_prot)) + return -EAGAIN; + return 0; +} +#elif !defined(CONFIG_XEN_PRIVILEGED_GUEST) +static int mmap_mem(struct file * file, struct vm_area_struct * vma) +{ + return -ENXIO; +} +#else +static int mmap_mem(struct file * file, struct vm_area_struct * vma) +{ + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + + if (!(start_info.flags & SIF_PRIVILEGED)) + return -ENXIO; + + /* DONTCOPY is essential for Xen as copy_page_range is broken. */ + vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + if (direct_remap_area_pages(vma->vm_mm, vma->vm_start, offset, + vma->vm_end-vma->vm_start, vma->vm_page_prot, + (domid_t)file->private_data)) + return -EAGAIN; + return 0; +} + +static int ioctl_mem(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case _IO('M', 1): file->private_data = (void *)arg; break; + default: return -ENOSYS; + } + return 0; +} +#endif /* CONFIG_XEN */ + +extern long vread(char *buf, char *addr, unsigned long count); +extern long vwrite(char *buf, char *addr, unsigned long count); + +/* + * This function reads the *virtual* memory as seen by the kernel. + */ +static ssize_t read_kmem(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + unsigned long p = *ppos; + ssize_t read = 0; + ssize_t virtr = 0; + char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */ + + if (p < (unsigned long) high_memory) { + read = count; + if (count > (unsigned long) high_memory - p) + read = (unsigned long) high_memory - p; + +#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU)) + /* we don't have page 0 mapped on sparc and m68k.. */ + if (p < PAGE_SIZE && read > 0) { + size_t tmp = PAGE_SIZE - p; + if (tmp > read) tmp = read; + if (clear_user(buf, tmp)) + return -EFAULT; + buf += tmp; + p += tmp; + read -= tmp; + count -= tmp; + } +#endif + if (copy_to_user(buf, (char *)p, read)) + return -EFAULT; + p += read; + buf += read; + count -= read; + } + + if (count > 0) { + kbuf = (char *)__get_free_page(GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + while (count > 0) { + int len = count; + + if (len > PAGE_SIZE) + len = PAGE_SIZE; + len = vread(kbuf, (char *)p, len); + if (!len) + break; + if (copy_to_user(buf, kbuf, len)) { + free_page((unsigned long)kbuf); + return -EFAULT; + } + count -= len; + buf += len; + virtr += len; + p += len; + } + free_page((unsigned long)kbuf); + } + *ppos = p; + return virtr + read; +} + +/* + * This function writes to the *virtual* memory as seen by the kernel. + */ +static ssize_t write_kmem(struct file * file, const char __user * buf, + size_t count, loff_t *ppos) +{ + unsigned long p = *ppos; + ssize_t wrote = 0; + ssize_t virtr = 0; + ssize_t written; + char * kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */ + + if (p < (unsigned long) high_memory) { + + wrote = count; + if (count > (unsigned long) high_memory - p) + wrote = (unsigned long) high_memory - p; + + written = do_write_mem((void*)p, p, buf, wrote, ppos); + if (written != wrote) + return written; + wrote = written; + p += wrote; + buf += wrote; + count -= wrote; + } + + if (count > 0) { + kbuf = (char *)__get_free_page(GFP_KERNEL); + if (!kbuf) + return wrote ? wrote : -ENOMEM; + while (count > 0) { + int len = count; + + if (len > PAGE_SIZE) + len = PAGE_SIZE; + if (len) { + written = copy_from_user(kbuf, buf, len); + if (written) { + ssize_t ret; + + free_page((unsigned long)kbuf); + ret = wrote + virtr + (len - written); + return ret ? ret : -EFAULT; + } + } + len = vwrite(kbuf, (char *)p, len); + count -= len; + buf += len; + virtr += len; + p += len; + } + free_page((unsigned long)kbuf); + } + + *ppos = p; + return virtr + wrote; +} + +#if defined(CONFIG_ISA) || !defined(__mc68000__) +static ssize_t read_port(struct file * file, char __user * buf, + size_t count, loff_t *ppos) +{ + unsigned long i = *ppos; + char __user *tmp = buf; + + if (verify_area(VERIFY_WRITE,buf,count)) + return -EFAULT; + while (count-- > 0 && i < 65536) { + if (__put_user(inb(i),tmp) < 0) + return -EFAULT; + i++; + tmp++; + } + *ppos = i; + return tmp-buf; +} + +static ssize_t write_port(struct file * file, const char __user * buf, + size_t count, loff_t *ppos) +{ + unsigned long i = *ppos; + const char __user * tmp = buf; + + if (verify_area(VERIFY_READ,buf,count)) + return -EFAULT; + while (count-- > 0 && i < 65536) { + char c; + if (__get_user(c, tmp)) + return -EFAULT; + outb(c,i); + i++; + tmp++; + } + *ppos = i; + return tmp-buf; +} +#endif + +static ssize_t read_null(struct file * file, char __user * buf, + size_t count, loff_t *ppos) +{ + return 0; +} + +static ssize_t write_null(struct file * file, const char __user * buf, + size_t count, loff_t *ppos) +{ + return count; +} + +#ifdef CONFIG_MMU +/* + * For fun, we are using the MMU for this. + */ +static inline size_t read_zero_pagealigned(char __user * buf, size_t size) +{ + struct mm_struct *mm; + struct vm_area_struct * vma; + unsigned long addr=(unsigned long)buf; + + mm = current->mm; + /* Oops, this was forgotten before. -ben */ + down_read(&mm->mmap_sem); + + /* For private mappings, just map in zero pages. */ + for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { + unsigned long count; + + if (vma->vm_start > addr || (vma->vm_flags & VM_WRITE) == 0) + goto out_up; + if (vma->vm_flags & VM_SHARED) + break; + count = vma->vm_end - addr; + if (count > size) + count = size; + + zap_page_range(vma, addr, count, NULL); + zeromap_page_range(vma, addr, count, PAGE_COPY); + + size -= count; + buf += count; + addr += count; + if (size == 0) + goto out_up; + } + + up_read(&mm->mmap_sem); + + /* The shared case is hard. Let's do the conventional zeroing. */ + do { + unsigned long unwritten = clear_user(buf, PAGE_SIZE); + if (unwritten) + return size + unwritten - PAGE_SIZE; + cond_resched(); + buf += PAGE_SIZE; + size -= PAGE_SIZE; + } while (size); + + return size; +out_up: + up_read(&mm->mmap_sem); + return size; +} + +static ssize_t read_zero(struct file * file, char __user * buf, + size_t count, loff_t *ppos) +{ + unsigned long left, unwritten, written = 0; + + if (!count) + return 0; + + if (!access_ok(VERIFY_WRITE, buf, count)) + return -EFAULT; + + left = count; + + /* do we want to be clever? Arbitrary cut-off */ + if (count >= PAGE_SIZE*4) { + unsigned long partial; + + /* How much left of the page? */ + partial = (PAGE_SIZE-1) & -(unsigned long) buf; + unwritten = clear_user(buf, partial); + written = partial - unwritten; + if (unwritten) + goto out; + left -= partial; + buf += partial; + unwritten = read_zero_pagealigned(buf, left & PAGE_MASK); + written += (left & PAGE_MASK) - unwritten; + if (unwritten) + goto out; + buf += left & PAGE_MASK; + left &= ~PAGE_MASK; + } + unwritten = clear_user(buf, left); + written += left - unwritten; +out: + return written ? written : -EFAULT; +} + +static int mmap_zero(struct file * file, struct vm_area_struct * vma) +{ + if (vma->vm_flags & VM_SHARED) + return shmem_zero_setup(vma); + if (zeromap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, vma->vm_page_prot)) + return -EAGAIN; + return 0; +} +#else /* CONFIG_MMU */ +static ssize_t read_zero(struct file * file, char * buf, + size_t count, loff_t *ppos) +{ + size_t todo = count; + + while (todo) { + size_t chunk = todo; + + if (chunk > 4096) + chunk = 4096; /* Just for latency reasons */ + if (clear_user(buf, chunk)) + return -EFAULT; + buf += chunk; + todo -= chunk; + cond_resched(); + } + return count; +} + +static int mmap_zero(struct file * file, struct vm_area_struct * vma) +{ + return -ENOSYS; +} +#endif /* CONFIG_MMU */ + +static ssize_t write_full(struct file * file, const char __user * buf, + size_t count, loff_t *ppos) +{ + return -ENOSPC; +} + +/* + * Special lseek() function for /dev/null and /dev/zero. Most notably, you + * can fopen() both devices with "a" now. This was previously impossible. + * -- SRB. + */ + +static loff_t null_lseek(struct file * file, loff_t offset, int orig) +{ + return file->f_pos = 0; +} + +/* + * The memory devices use the full 32/64 bits of the offset, and so we cannot + * check against negative addresses: they are ok. The return value is weird, + * though, in that case (0). + * + * also note that seeking relative to the "end of file" isn't supported: + * it has no meaning, so it returns -EINVAL. + */ +static loff_t memory_lseek(struct file * file, loff_t offset, int orig) +{ + loff_t ret; + + down(&file->f_dentry->d_inode->i_sem); + switch (orig) { + case 0: + file->f_pos = offset; + ret = file->f_pos; + force_successful_syscall_return(); + break; + case 1: + file->f_pos += offset; + ret = file->f_pos; + force_successful_syscall_return(); + break; + default: + ret = -EINVAL; + } + up(&file->f_dentry->d_inode->i_sem); + return ret; +} + +static int open_port(struct inode * inode, struct file * filp) +{ + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; +} + +#define mmap_kmem mmap_mem +#define zero_lseek null_lseek +#define full_lseek null_lseek +#define write_zero write_null +#define read_full read_zero +#define open_mem open_port +#define open_kmem open_mem + +static struct file_operations mem_fops = { + .llseek = memory_lseek, + .read = read_mem, + .write = write_mem, + .mmap = mmap_mem, + .open = open_mem, +#if defined(CONFIG_XEN_PRIVILEGED_GUEST) + .ioctl = ioctl_mem, +#endif +}; + +static struct file_operations kmem_fops = { + .llseek = memory_lseek, + .read = read_kmem, + .write = write_kmem, +#if !defined(CONFIG_XEN) + .mmap = mmap_kmem, +#endif + .open = open_kmem, +}; + +static struct file_operations null_fops = { + .llseek = null_lseek, + .read = read_null, + .write = write_null, +}; + +#if defined(CONFIG_ISA) || !defined(__mc68000__) +static struct file_operations port_fops = { + .llseek = memory_lseek, + .read = read_port, + .write = write_port, + .open = open_port, +}; +#endif + +static struct file_operations zero_fops = { + .llseek = zero_lseek, + .read = read_zero, + .write = write_zero, + .mmap = mmap_zero, +}; + +static struct file_operations full_fops = { + .llseek = full_lseek, + .read = read_full, + .write = write_full, +}; + +static ssize_t kmsg_write(struct file * file, const char __user * buf, + size_t count, loff_t *ppos) +{ + char *tmp; + int ret; + + tmp = kmalloc(count + 1, GFP_KERNEL); + if (tmp == NULL) + return -ENOMEM; + ret = -EFAULT; + if (!copy_from_user(tmp, buf, count)) { + tmp[count] = 0; + ret = printk("%s", tmp); + } + kfree(tmp); + return ret; +} + +static struct file_operations kmsg_fops = { + .write = kmsg_write, +}; + +static int memory_open(struct inode * inode, struct file * filp) +{ + switch (iminor(inode)) { + case 1: + filp->f_op = &mem_fops; + break; + case 2: + filp->f_op = &kmem_fops; + break; + case 3: + filp->f_op = &null_fops; + break; +#if defined(CONFIG_ISA) || !defined(__mc68000__) + case 4: + filp->f_op = &port_fops; + break; +#endif + case 5: + filp->f_op = &zero_fops; + break; + case 7: + filp->f_op = &full_fops; + break; + case 8: + filp->f_op = &random_fops; + break; + case 9: + filp->f_op = &urandom_fops; + break; + case 11: + filp->f_op = &kmsg_fops; + break; + default: + return -ENXIO; + } + if (filp->f_op && filp->f_op->open) + return filp->f_op->open(inode,filp); + return 0; +} + +static struct file_operations memory_fops = { + .open = memory_open, /* just a selector for the real open */ +}; + +static const struct { + unsigned int minor; + char *name; + umode_t mode; + struct file_operations *fops; +} devlist[] = { /* list of minor devices */ + {1, "mem", S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops}, + {2, "kmem", S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops}, + {3, "null", S_IRUGO | S_IWUGO, &null_fops}, +#if defined(CONFIG_ISA) || !defined(__mc68000__) + {4, "port", S_IRUSR | S_IWUSR | S_IRGRP, &port_fops}, +#endif + {5, "zero", S_IRUGO | S_IWUGO, &zero_fops}, + {7, "full", S_IRUGO | S_IWUGO, &full_fops}, + {8, "random", S_IRUGO | S_IWUSR, &random_fops}, + {9, "urandom", S_IRUGO | S_IWUSR, &urandom_fops}, + {11,"kmsg", S_IRUGO | S_IWUSR, &kmsg_fops}, +}; + +static struct class_simple *mem_class; + +static int __init chr_dev_init(void) +{ + int i; + + if (register_chrdev(MEM_MAJOR,"mem",&memory_fops)) + printk("unable to get major %d for memory devs\n", MEM_MAJOR); + + mem_class = class_simple_create(THIS_MODULE, "mem"); + for (i = 0; i < ARRAY_SIZE(devlist); i++) { + class_simple_device_add(mem_class, + MKDEV(MEM_MAJOR, devlist[i].minor), + NULL, devlist[i].name); + devfs_mk_cdev(MKDEV(MEM_MAJOR, devlist[i].minor), + S_IFCHR | devlist[i].mode, devlist[i].name); + } + +#if defined (CONFIG_FB) + fbmem_init(); +#endif + return 0; +} + +fs_initcall(chr_dev_init); diff --git a/linux-2.6.7-xen-sparse/drivers/xen/Makefile b/linux-2.6.7-xen-sparse/drivers/xen/Makefile index 91240534c3..2b3219fd4e 100644 --- a/linux-2.6.7-xen-sparse/drivers/xen/Makefile +++ b/linux-2.6.7-xen-sparse/drivers/xen/Makefile @@ -1,6 +1,7 @@ -obj-y += evtchn/ -obj-y += console/ obj-y += block/ +obj-y += console/ +obj-y += evtchn/ obj-y += net/ +obj-y += privcmd/ diff --git a/linux-2.6.7-xen-sparse/drivers/xen/privcmd/Makefile b/linux-2.6.7-xen-sparse/drivers/xen/privcmd/Makefile new file mode 100644 index 0000000000..e21869553c --- /dev/null +++ b/linux-2.6.7-xen-sparse/drivers/xen/privcmd/Makefile @@ -0,0 +1,2 @@ + +obj-y := privcmd.o diff --git a/linux-2.6.7-xen-sparse/drivers/xen/privcmd/privcmd.c b/linux-2.6.7-xen-sparse/drivers/xen/privcmd/privcmd.c new file mode 100644 index 0000000000..3275dd015f --- /dev/null +++ b/linux-2.6.7-xen-sparse/drivers/xen/privcmd/privcmd.c @@ -0,0 +1,230 @@ +/****************************************************************************** + * core.c + * + * Interface to privileged domain-0 commands. + * + * Copyright (c) 2002-2004, K A Fraser, B Dragovic + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +static struct proc_dir_entry *privcmd_intf; + +static int privcmd_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long data) +{ + int ret = -ENOSYS; + + switch ( cmd ) + { + case IOCTL_PRIVCMD_HYPERCALL: + { + privcmd_hypercall_t hypercall; + + if ( copy_from_user(&hypercall, (void *)data, sizeof(hypercall)) ) + return -EFAULT; + + __asm__ __volatile__ ( + "pushl %%ebx; pushl %%ecx; pushl %%edx; pushl %%esi; pushl %%edi; " + "movl 4(%%eax),%%ebx ;" + "movl 8(%%eax),%%ecx ;" + "movl 12(%%eax),%%edx ;" + "movl 16(%%eax),%%esi ;" + "movl 20(%%eax),%%edi ;" + "movl (%%eax),%%eax ;" + TRAP_INSTR "; " + "popl %%edi; popl %%esi; popl %%edx; popl %%ecx; popl %%ebx" + : "=a" (ret) : "0" (&hypercall) : "memory" ); + + } + break; + + case IOCTL_PRIVCMD_INITDOMAIN_EVTCHN: + { + extern int initdom_ctrlif_domcontroller_port; + ret = initdom_ctrlif_domcontroller_port; + } + break; + + + case IOCTL_PRIVCMD_MMAP: + { +#define PRIVCMD_MMAP_SZ 32 + privcmd_mmap_t mmapcmd; + privcmd_mmap_entry_t msg[PRIVCMD_MMAP_SZ], *p; + int i, rc; + + if ( copy_from_user(&mmapcmd, (void *)data, sizeof(mmapcmd)) ) + return -EFAULT; + + p = mmapcmd.entry; + + for (i=0; iPRIVCMD_MMAP_SZ)? + PRIVCMD_MMAP_SZ:(mmapcmd.num-i); + if ( copy_from_user(&msg, p, n*sizeof(privcmd_mmap_entry_t)) ) + return -EFAULT; + + for ( j = 0; j < n; j++ ) + { + struct vm_area_struct *vma = + find_vma( current->mm, msg[j].va ); + + if ( !vma ) + return -EINVAL; + + if ( msg[j].va > PAGE_OFFSET ) + return -EINVAL; + + if ( (msg[j].va + (msg[j].npages< vma->vm_end ) + return -EINVAL; + + if ( (rc = direct_remap_area_pages(vma->vm_mm, + msg[j].va&PAGE_MASK, + msg[j].mfn<vm_page_prot, + mmapcmd.dom)) < 0 ) + return rc; + } + } + ret = 0; + } + break; + + case IOCTL_PRIVCMD_MMAPBATCH: + { +#define MAX_DIRECTMAP_MMU_QUEUE 130 + mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *w, *v; + privcmd_mmapbatch_t m; + struct vm_area_struct *vma = NULL; + unsigned long *p, addr; + unsigned long mfn; + int i; + + if ( copy_from_user(&m, (void *)data, sizeof(m)) ) + { ret = -EFAULT; goto batch_err; } + + vma = find_vma( current->mm, m.addr ); + + if ( !vma ) + { ret = -EINVAL; goto batch_err; } + + if ( m.addr > PAGE_OFFSET ) + { ret = -EFAULT; goto batch_err; } + + if ( (m.addr + (m.num< vma->vm_end ) + { ret = -EFAULT; goto batch_err; } + + if ( m.dom != 0 ) + { + u[0].val = (unsigned long)(m.dom<<16) & ~0xFFFFUL; + u[0].ptr = (unsigned long)(m.dom<< 0) & ~0xFFFFUL; + u[0].ptr |= MMU_EXTENDED_COMMAND; + u[0].val |= MMUEXT_SET_SUBJECTDOM; + v = w = &u[1]; + } + else + { + v = w = &u[0]; + } + + p = m.arr; + addr = m.addr; + for ( i = 0; i < m.num; i++, addr += PAGE_SIZE, p++ ) + { + if ( get_user(mfn, p) ) + return -EFAULT; + + v->val = (mfn << PAGE_SHIFT) | pgprot_val(vma->vm_page_prot); + + __direct_remap_area_pages(vma->vm_mm, + addr, + PAGE_SIZE, + v); + + if ( unlikely(HYPERVISOR_mmu_update(u, v - u + 1, NULL) < 0) ) + put_user( 0xF0000000 | mfn, p ); + + v = w; + } + ret = 0; + break; + + batch_err: + printk("batch_err ret=%d vma=%p addr=%lx num=%d arr=%p %lx-%lx\n", + ret, vma, m.addr, m.num, m.arr, vma->vm_start, vma->vm_end); + break; + } + break; + + default: + ret = -EINVAL; + break; + } + return ret; +} + +static int privcmd_mmap(struct file * file, struct vm_area_struct * vma) +{ + /* DONTCOPY is essential for Xen as copy_page_range is broken. */ + vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY; + + return 0; +} + +static struct file_operations privcmd_file_ops = { + ioctl : privcmd_ioctl, + mmap: privcmd_mmap +}; + + +static int __init privcmd_init(void) +{ + if ( !(start_info.flags & SIF_PRIVILEGED) ) + return 0; + + privcmd_intf = create_xen_proc_entry("privcmd", 0400); + if ( privcmd_intf != NULL ) + { + privcmd_intf->owner = THIS_MODULE; + privcmd_intf->nlink = 1; + privcmd_intf->proc_fops = &privcmd_file_ops; + } + + return 0; +} + + +static void __exit privcmd_cleanup(void) +{ + if ( privcmd_intf == NULL ) return; + remove_xen_proc_entry("privcmd"); + privcmd_intf = NULL; +} + + +module_init(privcmd_init); +module_exit(privcmd_cleanup); diff --git a/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgalloc.h b/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgalloc.h index 422cdc5283..f27a7aadcf 100644 --- a/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgalloc.h +++ b/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgalloc.h @@ -56,4 +56,15 @@ static inline void pte_free(struct page *pte) #define check_pgt_cache() do { } while (0) +int direct_remap_area_pages(struct mm_struct *mm, + unsigned long address, + unsigned long machine_addr, + unsigned long size, + pgprot_t prot, + domid_t domid); +int __direct_remap_area_pages(struct mm_struct *mm, + unsigned long address, + unsigned long size, + mmu_update_t *v); + #endif /* _I386_PGALLOC_H */ diff --git a/linux-2.6.7-xen-sparse/include/asm-xen/proc_cmd.h b/linux-2.6.7-xen-sparse/include/asm-xen/proc_cmd.h new file mode 100644 index 0000000000..08e452de15 --- /dev/null +++ b/linux-2.6.7-xen-sparse/include/asm-xen/proc_cmd.h @@ -0,0 +1,63 @@ +/****************************************************************************** + * proc_cmd.h + * + * Interface to /proc/cmd and /proc/xen/privcmd. + */ + +#ifndef __PROC_CMD_H__ +#define __PROC_CMD_H__ + +typedef struct privcmd_hypercall +{ + unsigned long op; + unsigned long arg[5]; +} privcmd_hypercall_t; + +typedef struct privcmd_mmap_entry { + unsigned long va; + unsigned long mfn; + unsigned long npages; +} privcmd_mmap_entry_t; + +typedef struct privcmd_mmap { + int num; + domid_t dom; /* target domain */ + privcmd_mmap_entry_t *entry; +} privcmd_mmap_t; + +typedef struct privcmd_mmapbatch { + int num; // number of pages to populate + domid_t dom; // target domain + unsigned long addr; // virtual address + unsigned long *arr; // array of mfns - top nibble set on err +} privcmd_mmapbatch_t; + +typedef struct privcmd_blkmsg +{ + unsigned long op; + void *buf; + int buf_size; +} privcmd_blkmsg_t; + +/* + * @cmd: IOCTL_PRIVCMD_HYPERCALL + * @arg: &privcmd_hypercall_t + * Return: Value returned from execution of the specified hypercall. + */ +#define IOCTL_PRIVCMD_HYPERCALL \ + _IOC(_IOC_NONE, 'P', 0, sizeof(privcmd_hypercall_t)) + +/* + * @cmd: IOCTL_PRIVCMD_INITDOMAIN_EVTCHN + * @arg: n/a + * Return: Port associated with domain-controller end of control event channel + * for the initial domain. + */ +#define IOCTL_PRIVCMD_INITDOMAIN_EVTCHN \ + _IOC(_IOC_NONE, 'P', 1, 0) +#define IOCTL_PRIVCMD_MMAP \ + _IOC(_IOC_NONE, 'P', 2, sizeof(privcmd_mmap_t)) +#define IOCTL_PRIVCMD_MMAPBATCH \ + _IOC(_IOC_NONE, 'P', 2, sizeof(privcmd_mmapbatch_t)) + +#endif /* __PROC_CMD_H__ */ diff --git a/linux-2.6.7-xen-sparse/include/asm-xen/xen_proc.h b/linux-2.6.7-xen-sparse/include/asm-xen/xen_proc.h new file mode 100644 index 0000000000..d62791e95c --- /dev/null +++ b/linux-2.6.7-xen-sparse/include/asm-xen/xen_proc.h @@ -0,0 +1,13 @@ + +#ifndef __ASM_XEN_PROC_H__ +#define __ASM_XEN_PROC_H__ + +#include +#include + +extern struct proc_dir_entry *create_xen_proc_entry( + const char *name, mode_t mode); +extern void remove_xen_proc_entry( + const char *name); + +#endif /* __ASM_XEN_PROC_H__ */ -- 2.30.2